First we get the metaviper predictions, LV scores, and Random Forest weights from Synapse. We filter for LVs that are selected by the random forest.
#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',mp_scores))$asDataFrame()%>%
subset(isCellLine!='TRUE')
##
[####################]100.00% 1/1 Done...
Downloading [##------------------]7.96% 2.0MB/25.1MB (3.5MB/s) Job-103108778937140675737338019.csv
Downloading [###-----------------]15.92% 4.0MB/25.1MB (5.1MB/s) Job-103108778937140675737338019.csv
Downloading [#####---------------]23.88% 6.0MB/25.1MB (6.1MB/s) Job-103108778937140675737338019.csv
Downloading [######--------------]31.85% 8.0MB/25.1MB (6.7MB/s) Job-103108778937140675737338019.csv
Downloading [########------------]39.81% 10.0MB/25.1MB (7.0MB/s) Job-103108778937140675737338019.csv
Downloading [##########----------]47.77% 12.0MB/25.1MB (7.4MB/s) Job-103108778937140675737338019.csv
Downloading [###########---------]55.73% 14.0MB/25.1MB (7.6MB/s) Job-103108778937140675737338019.csv
Downloading [#############-------]63.69% 16.0MB/25.1MB (7.8MB/s) Job-103108778937140675737338019.csv
Downloading [##############------]71.65% 18.0MB/25.1MB (8.0MB/s) Job-103108778937140675737338019.csv
Downloading [################----]79.61% 20.0MB/25.1MB (8.0MB/s) Job-103108778937140675737338019.csv
Downloading [##################--]87.58% 22.0MB/25.1MB (7.8MB/s) Job-103108778937140675737338019.csv
Downloading [###################-]95.54% 24.0MB/25.1MB (7.8MB/s) Job-103108778937140675737338019.csv
Downloading [####################]100.00% 25.1MB/25.1MB (7.7MB/s) Job-103108778937140675737338019.csv Done...
##get metaviper scores
mtab<-synapser::synTableQuery(paste('select * from',metaviper_scores))$asDataFrame()
##
Building the CSV... [#-------------------]3.28% 48152/1467984
Building the CSV... [#-------------------]7.43% 109077/1467984
Building the CSV... [###-----------------]16.36% 240110/1467984
Building the CSV... [#####---------------]24.91% 365629/1467984
Building the CSV... [######--------------]28.99% 425640/1467984
Building the CSV... [########------------]38.01% 558026/1467984
Create CSV FileHandle [##########----------]50.00% 733995/1467984
Create CSV FileHandle [####################]100.00% 1467984/1467984 Done...
Downloading [--------------------]2.47% 2.0MB/81.0MB (3.6MB/s) Job-103108782087876564897722437.csv
Downloading [#-------------------]4.94% 4.0MB/81.0MB (5.0MB/s) Job-103108782087876564897722437.csv
Downloading [#-------------------]7.40% 6.0MB/81.0MB (6.1MB/s) Job-103108782087876564897722437.csv
Downloading [##------------------]9.87% 8.0MB/81.0MB (6.8MB/s) Job-103108782087876564897722437.csv
Downloading [##------------------]12.34% 10.0MB/81.0MB (7.1MB/s) Job-103108782087876564897722437.csv
Downloading [###-----------------]14.81% 12.0MB/81.0MB (7.4MB/s) Job-103108782087876564897722437.csv
Downloading [###-----------------]17.27% 14.0MB/81.0MB (7.6MB/s) Job-103108782087876564897722437.csv
Downloading [####----------------]19.74% 16.0MB/81.0MB (7.8MB/s) Job-103108782087876564897722437.csv
Downloading [####----------------]22.21% 18.0MB/81.0MB (8.0MB/s) Job-103108782087876564897722437.csv
Downloading [#####---------------]24.68% 20.0MB/81.0MB (8.1MB/s) Job-103108782087876564897722437.csv
Downloading [#####---------------]27.14% 22.0MB/81.0MB (8.2MB/s) Job-103108782087876564897722437.csv
Downloading [######--------------]29.61% 24.0MB/81.0MB (8.4MB/s) Job-103108782087876564897722437.csv
Downloading [######--------------]32.08% 26.0MB/81.0MB (8.5MB/s) Job-103108782087876564897722437.csv
Downloading [#######-------------]34.55% 28.0MB/81.0MB (8.6MB/s) Job-103108782087876564897722437.csv
Downloading [#######-------------]37.01% 30.0MB/81.0MB (8.7MB/s) Job-103108782087876564897722437.csv
Downloading [########------------]39.48% 32.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [########------------]41.95% 34.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [#########-----------]44.42% 36.0MB/81.0MB (8.7MB/s) Job-103108782087876564897722437.csv
Downloading [#########-----------]46.88% 38.0MB/81.0MB (8.7MB/s) Job-103108782087876564897722437.csv
Downloading [##########----------]49.35% 40.0MB/81.0MB (8.7MB/s) Job-103108782087876564897722437.csv
Downloading [##########----------]51.82% 42.0MB/81.0MB (8.7MB/s) Job-103108782087876564897722437.csv
Downloading [###########---------]54.29% 44.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [###########---------]56.76% 46.0MB/81.0MB (8.7MB/s) Job-103108782087876564897722437.csv
Downloading [############--------]59.22% 48.0MB/81.0MB (8.7MB/s) Job-103108782087876564897722437.csv
Downloading [############--------]61.69% 50.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [#############-------]64.16% 52.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [#############-------]66.63% 54.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [##############------]69.09% 56.0MB/81.0MB (8.9MB/s) Job-103108782087876564897722437.csv
Downloading [##############------]71.56% 58.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [###############-----]74.03% 60.0MB/81.0MB (8.9MB/s) Job-103108782087876564897722437.csv
Downloading [###############-----]76.50% 62.0MB/81.0MB (8.9MB/s) Job-103108782087876564897722437.csv
Downloading [################----]78.96% 64.0MB/81.0MB (9.0MB/s) Job-103108782087876564897722437.csv
Downloading [################----]81.43% 66.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [#################---]83.90% 68.0MB/81.0MB (8.8MB/s) Job-103108782087876564897722437.csv
Downloading [#################---]86.37% 70.0MB/81.0MB (8.9MB/s) Job-103108782087876564897722437.csv
Downloading [##################--]88.83% 72.0MB/81.0MB (8.9MB/s) Job-103108782087876564897722437.csv
Downloading [##################--]91.30% 74.0MB/81.0MB (8.9MB/s) Job-103108782087876564897722437.csv
Downloading [###################-]93.77% 76.0MB/81.0MB (9.0MB/s) Job-103108782087876564897722437.csv
Downloading [###################-]96.24% 78.0MB/81.0MB (9.0MB/s) Job-103108782087876564897722437.csv
Downloading [####################]98.70% 80.0MB/81.0MB (9.0MB/s) Job-103108782087876564897722437.csv
Downloading [####################]100.00% 81.0MB/81.0MB (9.0MB/s) Job-103108782087876564897722437.csv Done...
##get rf loadings
rftab<-synapser::synTableQuery(paste('select * from',rf_mp))$asDataFrame()%>%
select(LV_Full,`Cutaneous Neurofibroma`,`Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,`Plexiform Neurofibroma`)%>%
mutate(latent_var=gsub('`','',LV_Full))%>%
select(-LV_Full)
##
[####################]100.00% 1/1 Done...
Downloading [####################]100.00% 79.8kB/79.8kB (546.9kB/s) Job-103108861380691698548384651.csv Done...
samps<-intersect(dtab$specimenID,mtab$specimenID)
#get RF-selected latent variables
lvs<-synTableQuery("select * from syn21315356")$asDataFrame()$LatentVar
##
[####################]100.00% 1/1 Done...
Downloading [####################]100.00% 2.2kB/2.2kB (2.8MB/s) Job-103108872313621925498032413.csv Done...
mp_res<-dtab%>%
subset(specimenID%in%samps)%>%
subset(latent_var%in%lvs)%>%
# group_by(latent_var) %>%
# mutate(sd_value = sd(value)) %>%
# filter(sd_value > 0.05) %>%
# ungroup()%>%
select(latent_var,value,specimenID,diagnosis)%>%
left_join(rftab,by='latent_var')
combined<-mp_res%>%ungroup()%>%inner_join(mtab,by='specimenID')
#now compute some basic stats
mp_stats<-mp_res%>%
rowwise()%>%mutate(MaxVal=max(`Cutaneous Neurofibroma`,`Plexiform Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,Neurofibroma))%>%
rowwise()%>%
mutate(MeanVal=mean(c(`Cutaneous Neurofibroma`,`Plexiform Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,Neurofibroma)))
DT::datatable(mp_stats)
With the RF-selected LVs for each random forest prediction, we can plot those metaviper proteins that correlate with them.
corVals=combined%>%#subset(latent_var%in%unique(unlist(top10)))%>%
group_by(latent_var,gene)%>%
summarize(corVal=cor(value,metaviperscore,use='pairwise.complete.obs'),numSamps=n_distinct(specimenID))
corVals
## # A tibble: 627,476 x 4
## # Groups: latent_var [103]
## latent_var gene corVal numSamps
## <chr> <chr> <dbl> <int>
## 1 1,REACTOME_MRNA_SPLICING AATF 0.436 77
## 2 1,REACTOME_MRNA_SPLICING ABCA1 -0.570 77
## 3 1,REACTOME_MRNA_SPLICING ABCC8 -0.327 77
## 4 1,REACTOME_MRNA_SPLICING ABCC9 -0.619 77
## 5 1,REACTOME_MRNA_SPLICING ABCG1 -0.521 77
## 6 1,REACTOME_MRNA_SPLICING ABCG4 0.239 77
## 7 1,REACTOME_MRNA_SPLICING ABI1 -0.356 77
## 8 1,REACTOME_MRNA_SPLICING ABL1 -0.0887 77
## 9 1,REACTOME_MRNA_SPLICING ABL2 -0.318 77
## 10 1,REACTOME_MRNA_SPLICING ABLIM3 -0.652 77
## # … with 627,466 more rows
##let's store this in Synapse
tab<-synBuildTable('Top10 Metaviper Latent-Variable Correlations',parent='syn21046734',corVals)
synStore(tab)
##
Uploading [--------------------]0.00% 0.0bytes/27.6MB filef9c923e9f5fb
Uploading [######--------------]28.96% 8.0MB/27.6MB (760.2kB/s) filef9c923e9f5fb
Uploading [############--------]57.92% 16.0MB/27.6MB (761.6kB/s) filef9c923e9f5fb
Uploading [#################---]86.89% 24.0MB/27.6MB (765.1kB/s) filef9c923e9f5fb
Uploading [####################]100.00% 27.6MB/27.6MB (684.6kB/s) filef9c923e9f5fb Done...
[####################]100.00% 1/1 Done...
## <synapseclient.table.CsvFileTable object at 0x11842d588>
#corVals<-corVals%>%subset(latent_var%in%unique(unlist(top10)))
##now how do we bracket them?
##plot correlation distributions by cell type and method.
require(ggplot2)
##first re-order variables to plot
top.df<-mp_stats%>%rename(All='MeanVal')%>%gather(key="disease",value="pathway")
p<-corVals%>%
ungroup()%>%
subset(latent_var%in%unique(unlist(lvs)))%>%
# mutate(LatentVariable = stringr::str_trim(as.character(latent_var), 20))%>%
ggplot()+geom_boxplot(aes(x=latent_var,y=corVal))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of metaviper proteins with lv")
print(p)
There are some proteins that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.
These plots represent the top 10 latent variables for a predictor of each tumor type and the proteins that are correlated with them.
corthresh=0.75
##now filter to the cell types with correlated proteins
cor_cell_types=subset(corVals,corVal>corthresh)%>%
subset(latent_var%in%unique(unlist(lvs)))%>%
ungroup()%>%
select(latent_var)%>%
distinct()
print(paste('we found',nrow(cor_cell_types),'lvs with some protein correlation greater than',corthresh))
## [1] "we found 64 lvs with some protein correlation greater than 0.75"
DT::datatable(cor_cell_types)
apply(cor_cell_types,1,function(x){
ct=x[['latent_var']]
# m=x[['method']]
#for each gene and cell type
genes=subset(corVals,latent_var==ct)%>%
subset(corVal>corthresh)%>%
arrange(desc(corVal))%>%
ungroup()
if(nrow(genes)>12){
new.corthresh=format(genes$corVal[12],digits=3)
genes=genes[1:12,]
}else{
new.corthresh=corthresh
}
scores=subset(combined,gene%in%genes$gene)%>%subset(latent_var==ct)
p2<- ggplot(scores)+
geom_point(aes(x=value,y=metaviperscore,
col=gene,shape=tumorType))+
# scale_x_log10()+
ggtitle(paste(ct,'correlation >',new.corthresh,'\n',
subset(top.df,pathway==ct)%>%select(disease)%>%unlist()%>%paste(collapse=',')))
print(p2)
# ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
##
## [[33]]
##
## [[34]]
##
## [[35]]
##
## [[36]]
##
## [[37]]
##
## [[38]]
##
## [[39]]
##
## [[40]]
##
## [[41]]
##
## [[42]]
##
## [[43]]
##
## [[44]]
##
## [[45]]
##
## [[46]]
##
## [[47]]
##
## [[48]]
##
## [[49]]
##
## [[50]]
##
## [[51]]
##
## [[52]]
##
## [[53]]
##
## [[54]]
##
## [[55]]
##
## [[56]]
##
## [[57]]
##
## [[58]]
##
## [[59]]
##
## [[60]]
##
## [[61]]
##
## [[62]]
##
## [[63]]
##
## [[64]]
#parentid='syn20710537'
#for(fi in list.files('.')[grep('tions',list.files('.'))])
# synapser::synStore(synapser::File(fi,parentId=parentid,annotations=list(resourceType='analysis',isMultiSpecimen='TRUE',isMultiIndividual='TRUE')),used=c(deconv_scores,metaviper_scores),executed=this.script)